{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e04e415a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "65aa1fb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('data/movie_metadata.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7db6bbcf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4    NaN        Doug Walker                     NaN       NaN   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "   actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                  1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                 40000.0  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                 11000.0  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                 27000.0  448130642.0                  Action|Thriller  ...   \n",
       "4                   131.0          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0      2009.0                  936.0        7.9          1.78   \n",
       "1      2007.0                 5000.0        7.1          2.35   \n",
       "2      2015.0                  393.0        6.8          2.35   \n",
       "3      2012.0                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "41ac003e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       178.0\n",
       "1       169.0\n",
       "2       148.0\n",
       "3       164.0\n",
       "4         NaN\n",
       "        ...  \n",
       "5038     87.0\n",
       "5039     43.0\n",
       "5040     76.0\n",
       "5041    100.0\n",
       "5042     90.0\n",
       "Name: duration, Length: 5043, dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "29ae8c2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>num_voted_users</th>\n",
       "      <th>cast_total_facebook_likes</th>\n",
       "      <th>facenumber_in_poster</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4993.000000</td>\n",
       "      <td>5028.000000</td>\n",
       "      <td>4939.000000</td>\n",
       "      <td>5020.000000</td>\n",
       "      <td>5036.000000</td>\n",
       "      <td>4.159000e+03</td>\n",
       "      <td>5.043000e+03</td>\n",
       "      <td>5043.000000</td>\n",
       "      <td>5030.000000</td>\n",
       "      <td>5022.000000</td>\n",
       "      <td>4.551000e+03</td>\n",
       "      <td>4935.000000</td>\n",
       "      <td>5030.000000</td>\n",
       "      <td>5043.000000</td>\n",
       "      <td>4714.000000</td>\n",
       "      <td>5043.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>140.194272</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>686.509212</td>\n",
       "      <td>645.009761</td>\n",
       "      <td>6560.047061</td>\n",
       "      <td>4.846841e+07</td>\n",
       "      <td>8.366816e+04</td>\n",
       "      <td>9699.063851</td>\n",
       "      <td>1.371173</td>\n",
       "      <td>272.770808</td>\n",
       "      <td>3.975262e+07</td>\n",
       "      <td>2002.470517</td>\n",
       "      <td>1651.754473</td>\n",
       "      <td>6.442138</td>\n",
       "      <td>2.220403</td>\n",
       "      <td>7525.964505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>121.601675</td>\n",
       "      <td>25.197441</td>\n",
       "      <td>2813.328607</td>\n",
       "      <td>1665.041728</td>\n",
       "      <td>15020.759120</td>\n",
       "      <td>6.845299e+07</td>\n",
       "      <td>1.384853e+05</td>\n",
       "      <td>18163.799124</td>\n",
       "      <td>2.013576</td>\n",
       "      <td>377.982886</td>\n",
       "      <td>2.061149e+08</td>\n",
       "      <td>12.474599</td>\n",
       "      <td>4042.438863</td>\n",
       "      <td>1.125116</td>\n",
       "      <td>1.385113</td>\n",
       "      <td>19320.445110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.620000e+02</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.180000e+02</td>\n",
       "      <td>1916.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.600000</td>\n",
       "      <td>1.180000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>50.000000</td>\n",
       "      <td>93.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>133.000000</td>\n",
       "      <td>614.000000</td>\n",
       "      <td>5.340988e+06</td>\n",
       "      <td>8.593500e+03</td>\n",
       "      <td>1411.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>6.000000e+06</td>\n",
       "      <td>1999.000000</td>\n",
       "      <td>281.000000</td>\n",
       "      <td>5.800000</td>\n",
       "      <td>1.850000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>110.000000</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>371.500000</td>\n",
       "      <td>988.000000</td>\n",
       "      <td>2.551750e+07</td>\n",
       "      <td>3.435900e+04</td>\n",
       "      <td>3090.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>156.000000</td>\n",
       "      <td>2.000000e+07</td>\n",
       "      <td>2005.000000</td>\n",
       "      <td>595.000000</td>\n",
       "      <td>6.600000</td>\n",
       "      <td>2.350000</td>\n",
       "      <td>166.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>195.000000</td>\n",
       "      <td>118.000000</td>\n",
       "      <td>194.500000</td>\n",
       "      <td>636.000000</td>\n",
       "      <td>11000.000000</td>\n",
       "      <td>6.230944e+07</td>\n",
       "      <td>9.630900e+04</td>\n",
       "      <td>13756.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>326.000000</td>\n",
       "      <td>4.500000e+07</td>\n",
       "      <td>2011.000000</td>\n",
       "      <td>918.000000</td>\n",
       "      <td>7.200000</td>\n",
       "      <td>2.350000</td>\n",
       "      <td>3000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>813.000000</td>\n",
       "      <td>511.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>640000.000000</td>\n",
       "      <td>7.605058e+08</td>\n",
       "      <td>1.689764e+06</td>\n",
       "      <td>656730.000000</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>5060.000000</td>\n",
       "      <td>1.221550e+10</td>\n",
       "      <td>2016.000000</td>\n",
       "      <td>137000.000000</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>349000.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       num_critic_for_reviews     duration  director_facebook_likes  \\\n",
       "count             4993.000000  5028.000000              4939.000000   \n",
       "mean               140.194272   107.201074               686.509212   \n",
       "std                121.601675    25.197441              2813.328607   \n",
       "min                  1.000000     7.000000                 0.000000   \n",
       "25%                 50.000000    93.000000                 7.000000   \n",
       "50%                110.000000   103.000000                49.000000   \n",
       "75%                195.000000   118.000000               194.500000   \n",
       "max                813.000000   511.000000             23000.000000   \n",
       "\n",
       "       actor_3_facebook_likes  actor_1_facebook_likes         gross  \\\n",
       "count             5020.000000             5036.000000  4.159000e+03   \n",
       "mean               645.009761             6560.047061  4.846841e+07   \n",
       "std               1665.041728            15020.759120  6.845299e+07   \n",
       "min                  0.000000                0.000000  1.620000e+02   \n",
       "25%                133.000000              614.000000  5.340988e+06   \n",
       "50%                371.500000              988.000000  2.551750e+07   \n",
       "75%                636.000000            11000.000000  6.230944e+07   \n",
       "max              23000.000000           640000.000000  7.605058e+08   \n",
       "\n",
       "       num_voted_users  cast_total_facebook_likes  facenumber_in_poster  \\\n",
       "count     5.043000e+03                5043.000000           5030.000000   \n",
       "mean      8.366816e+04                9699.063851              1.371173   \n",
       "std       1.384853e+05               18163.799124              2.013576   \n",
       "min       5.000000e+00                   0.000000              0.000000   \n",
       "25%       8.593500e+03                1411.000000              0.000000   \n",
       "50%       3.435900e+04                3090.000000              1.000000   \n",
       "75%       9.630900e+04               13756.500000              2.000000   \n",
       "max       1.689764e+06              656730.000000             43.000000   \n",
       "\n",
       "       num_user_for_reviews        budget   title_year  \\\n",
       "count           5022.000000  4.551000e+03  4935.000000   \n",
       "mean             272.770808  3.975262e+07  2002.470517   \n",
       "std              377.982886  2.061149e+08    12.474599   \n",
       "min                1.000000  2.180000e+02  1916.000000   \n",
       "25%               65.000000  6.000000e+06  1999.000000   \n",
       "50%              156.000000  2.000000e+07  2005.000000   \n",
       "75%              326.000000  4.500000e+07  2011.000000   \n",
       "max             5060.000000  1.221550e+10  2016.000000   \n",
       "\n",
       "       actor_2_facebook_likes   imdb_score  aspect_ratio  movie_facebook_likes  \n",
       "count             5030.000000  5043.000000   4714.000000           5043.000000  \n",
       "mean              1651.754473     6.442138      2.220403           7525.964505  \n",
       "std               4042.438863     1.125116      1.385113          19320.445110  \n",
       "min                  0.000000     1.600000      1.180000              0.000000  \n",
       "25%                281.000000     5.800000      1.850000              0.000000  \n",
       "50%                595.000000     6.600000      2.350000            166.000000  \n",
       "75%                918.000000     7.200000      2.350000           3000.000000  \n",
       "max             137000.000000     9.500000     16.000000         349000.000000  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "de70a5e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color                         19\n",
       "director_name                104\n",
       "num_critic_for_reviews        50\n",
       "duration                      15\n",
       "director_facebook_likes      104\n",
       "actor_3_facebook_likes        23\n",
       "actor_2_name                  13\n",
       "actor_1_facebook_likes         7\n",
       "gross                        884\n",
       "genres                         0\n",
       "actor_1_name                   7\n",
       "movie_title                    0\n",
       "num_voted_users                0\n",
       "cast_total_facebook_likes      0\n",
       "actor_3_name                  23\n",
       "facenumber_in_poster          13\n",
       "plot_keywords                153\n",
       "movie_imdb_link                0\n",
       "num_user_for_reviews          21\n",
       "language                      12\n",
       "country                        5\n",
       "content_rating               303\n",
       "budget                       492\n",
       "title_year                   108\n",
       "actor_2_facebook_likes        13\n",
       "imdb_score                     0\n",
       "aspect_ratio                 329\n",
       "movie_facebook_likes           0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "68660480",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.country = data.country.fillna('')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "116c31af",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          USA\n",
       "1          USA\n",
       "2           UK\n",
       "3          USA\n",
       "4             \n",
       "         ...  \n",
       "5038    Canada\n",
       "5039       USA\n",
       "5040       USA\n",
       "5041       USA\n",
       "5042       USA\n",
       "Name: country, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "1d5b1123",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.duration.fillna(data['duration'].mean(),inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "739761a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       178.000000\n",
       "1       169.000000\n",
       "2       148.000000\n",
       "3       164.000000\n",
       "4       107.201074\n",
       "           ...    \n",
       "5038     87.000000\n",
       "5039     43.000000\n",
       "5040     76.000000\n",
       "5041    100.000000\n",
       "5042     90.000000\n",
       "Name: duration, Length: 5043, dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['duration']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "02d2f46a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.000000</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.000000</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Raimi</td>\n",
       "      <td>392.0</td>\n",
       "      <td>156.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4000.0</td>\n",
       "      <td>James Franco</td>\n",
       "      <td>24000.0</td>\n",
       "      <td>336530303.0</td>\n",
       "      <td>Action|Adventure|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>1902.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>258000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Color</td>\n",
       "      <td>Nathan Greno</td>\n",
       "      <td>324.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>15.0</td>\n",
       "      <td>284.0</td>\n",
       "      <td>Donna Murphy</td>\n",
       "      <td>799.0</td>\n",
       "      <td>200807262.0</td>\n",
       "      <td>Adventure|Animation|Comedy|Family|Fantasy|Musi...</td>\n",
       "      <td>...</td>\n",
       "      <td>387.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>260000000.0</td>\n",
       "      <td>2010.0</td>\n",
       "      <td>553.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1.85</td>\n",
       "      <td>29000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Color</td>\n",
       "      <td>Joss Whedon</td>\n",
       "      <td>635.0</td>\n",
       "      <td>141.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19000.0</td>\n",
       "      <td>Robert Downey Jr.</td>\n",
       "      <td>26000.0</td>\n",
       "      <td>458991599.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>1117.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>21000.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>118000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Color</td>\n",
       "      <td>David Yates</td>\n",
       "      <td>375.0</td>\n",
       "      <td>153.000000</td>\n",
       "      <td>282.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>Daniel Radcliffe</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>301956980.0</td>\n",
       "      <td>Adventure|Family|Fantasy|Mystery</td>\n",
       "      <td>...</td>\n",
       "      <td>973.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>10000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews    duration  \\\n",
       "0  Color      James Cameron                   723.0  178.000000   \n",
       "1  Color     Gore Verbinski                   302.0  169.000000   \n",
       "2  Color         Sam Mendes                   602.0  148.000000   \n",
       "3  Color  Christopher Nolan                   813.0  164.000000   \n",
       "4    NaN        Doug Walker                     NaN  107.201074   \n",
       "5  Color     Andrew Stanton                   462.0  132.000000   \n",
       "6  Color          Sam Raimi                   392.0  156.000000   \n",
       "7  Color       Nathan Greno                   324.0  100.000000   \n",
       "8  Color        Joss Whedon                   635.0  141.000000   \n",
       "9  Color        David Yates                   375.0  153.000000   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                      0.0                   855.0   Joel David Moore   \n",
       "1                    563.0                  1000.0      Orlando Bloom   \n",
       "2                      0.0                   161.0       Rory Kinnear   \n",
       "3                  22000.0                 23000.0     Christian Bale   \n",
       "4                    131.0                     NaN         Rob Walker   \n",
       "5                    475.0                   530.0    Samantha Morton   \n",
       "6                      0.0                  4000.0       James Franco   \n",
       "7                     15.0                   284.0       Donna Murphy   \n",
       "8                      0.0                 19000.0  Robert Downey Jr.   \n",
       "9                    282.0                 10000.0   Daniel Radcliffe   \n",
       "\n",
       "   actor_1_facebook_likes        gross  \\\n",
       "0                  1000.0  760505847.0   \n",
       "1                 40000.0  309404152.0   \n",
       "2                 11000.0  200074175.0   \n",
       "3                 27000.0  448130642.0   \n",
       "4                   131.0          NaN   \n",
       "5                   640.0   73058679.0   \n",
       "6                 24000.0  336530303.0   \n",
       "7                   799.0  200807262.0   \n",
       "8                 26000.0  458991599.0   \n",
       "9                 25000.0  301956980.0   \n",
       "\n",
       "                                              genres  ...  \\\n",
       "0                    Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                           Action|Adventure|Fantasy  ...   \n",
       "2                          Action|Adventure|Thriller  ...   \n",
       "3                                    Action|Thriller  ...   \n",
       "4                                        Documentary  ...   \n",
       "5                            Action|Adventure|Sci-Fi  ...   \n",
       "6                           Action|Adventure|Romance  ...   \n",
       "7  Adventure|Animation|Comedy|Family|Fantasy|Musi...  ...   \n",
       "8                            Action|Adventure|Sci-Fi  ...   \n",
       "9                   Adventure|Family|Fantasy|Mystery  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN                      NaN          NaN   \n",
       "5                738.0  English      USA           PG-13  263700000.0   \n",
       "6               1902.0  English      USA           PG-13  258000000.0   \n",
       "7                387.0  English      USA              PG  260000000.0   \n",
       "8               1117.0  English      USA           PG-13  250000000.0   \n",
       "9                973.0  English       UK              PG  250000000.0   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0      2009.0                  936.0        7.9          1.78   \n",
       "1      2007.0                 5000.0        7.1          2.35   \n",
       "2      2015.0                  393.0        6.8          2.35   \n",
       "3      2012.0                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "5      2012.0                  632.0        6.6          2.35   \n",
       "6      2007.0                11000.0        6.2          2.35   \n",
       "7      2010.0                  553.0        7.8          1.85   \n",
       "8      2015.0                21000.0        7.5          2.35   \n",
       "9      2009.0                11000.0        7.5          2.35   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "5                24000  \n",
       "6                    0  \n",
       "7                29000  \n",
       "8               118000  \n",
       "9                10000  \n",
       "\n",
       "[10 rows x 28 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "fa620242",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5026</th>\n",
       "      <td>Color</td>\n",
       "      <td>Olivier Assayas</td>\n",
       "      <td>81.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>Béatrice Dalle</td>\n",
       "      <td>576.0</td>\n",
       "      <td>136007.0</td>\n",
       "      <td>Drama|Music|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>French</td>\n",
       "      <td>France</td>\n",
       "      <td>R</td>\n",
       "      <td>4500.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.35</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5027</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jafar Panahi</td>\n",
       "      <td>64.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Nargess Mamizadeh</td>\n",
       "      <td>5.0</td>\n",
       "      <td>673780.0</td>\n",
       "      <td>Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Persian</td>\n",
       "      <td>Iran</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.85</td>\n",
       "      <td>697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5033</th>\n",
       "      <td>Color</td>\n",
       "      <td>Shane Carruth</td>\n",
       "      <td>143.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>291.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>David Sullivan</td>\n",
       "      <td>291.0</td>\n",
       "      <td>424760.0</td>\n",
       "      <td>Drama|Sci-Fi|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>371.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.85</td>\n",
       "      <td>19000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5035</th>\n",
       "      <td>Color</td>\n",
       "      <td>Robert Rodriguez</td>\n",
       "      <td>56.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Peter Marquardt</td>\n",
       "      <td>121.0</td>\n",
       "      <td>2040920.0</td>\n",
       "      <td>Action|Crime|Drama|Romance|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>130.0</td>\n",
       "      <td>Spanish</td>\n",
       "      <td>USA</td>\n",
       "      <td>R</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.37</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3756 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5026  Color    Olivier Assayas                    81.0     110.0   \n",
       "5027  Color       Jafar Panahi                    64.0      90.0   \n",
       "5033  Color      Shane Carruth                   143.0      77.0   \n",
       "5035  Color   Robert Rodriguez                    56.0      81.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                         0.0                   855.0   Joel David Moore   \n",
       "1                       563.0                  1000.0      Orlando Bloom   \n",
       "2                         0.0                   161.0       Rory Kinnear   \n",
       "3                     22000.0                 23000.0     Christian Bale   \n",
       "5                       475.0                   530.0    Samantha Morton   \n",
       "...                       ...                     ...                ...   \n",
       "5026                    107.0                    45.0     Béatrice Dalle   \n",
       "5027                    397.0                     0.0  Nargess Mamizadeh   \n",
       "5033                    291.0                     8.0     David Sullivan   \n",
       "5035                      0.0                     6.0    Peter Marquardt   \n",
       "5042                     16.0                    16.0   Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross  \\\n",
       "0                     1000.0  760505847.0   \n",
       "1                    40000.0  309404152.0   \n",
       "2                    11000.0  200074175.0   \n",
       "3                    27000.0  448130642.0   \n",
       "5                      640.0   73058679.0   \n",
       "...                      ...          ...   \n",
       "5026                   576.0     136007.0   \n",
       "5027                     5.0     673780.0   \n",
       "5033                   291.0     424760.0   \n",
       "5035                   121.0    2040920.0   \n",
       "5042                    86.0      85222.0   \n",
       "\n",
       "                                   genres  ... num_user_for_reviews language  \\\n",
       "0         Action|Adventure|Fantasy|Sci-Fi  ...               3054.0  English   \n",
       "1                Action|Adventure|Fantasy  ...               1238.0  English   \n",
       "2               Action|Adventure|Thriller  ...                994.0  English   \n",
       "3                         Action|Thriller  ...               2701.0  English   \n",
       "5                 Action|Adventure|Sci-Fi  ...                738.0  English   \n",
       "...                                   ...  ...                  ...      ...   \n",
       "5026                  Drama|Music|Romance  ...                 39.0   French   \n",
       "5027                                Drama  ...                 26.0  Persian   \n",
       "5033                Drama|Sci-Fi|Thriller  ...                371.0  English   \n",
       "5035  Action|Crime|Drama|Romance|Thriller  ...                130.0  Spanish   \n",
       "5042                          Documentary  ...                 84.0  English   \n",
       "\n",
       "      country  content_rating       budget  title_year actor_2_facebook_likes  \\\n",
       "0         USA           PG-13  237000000.0      2009.0                  936.0   \n",
       "1         USA           PG-13  300000000.0      2007.0                 5000.0   \n",
       "2          UK           PG-13  245000000.0      2015.0                  393.0   \n",
       "3         USA           PG-13  250000000.0      2012.0                23000.0   \n",
       "5         USA           PG-13  263700000.0      2012.0                  632.0   \n",
       "...       ...             ...          ...         ...                    ...   \n",
       "5026   France               R       4500.0      2004.0                  133.0   \n",
       "5027     Iran       Not Rated      10000.0      2000.0                    0.0   \n",
       "5033      USA           PG-13       7000.0      2004.0                   45.0   \n",
       "5035      USA               R       7000.0      1992.0                   20.0   \n",
       "5042      USA              PG       1100.0      2004.0                   23.0   \n",
       "\n",
       "     imdb_score  aspect_ratio movie_facebook_likes  \n",
       "0           7.9          1.78                33000  \n",
       "1           7.1          2.35                    0  \n",
       "2           6.8          2.35                85000  \n",
       "3           8.5          2.35               164000  \n",
       "5           6.6          2.35                24000  \n",
       "...         ...           ...                  ...  \n",
       "5026        6.9          2.35                  171  \n",
       "5027        7.5          1.85                  697  \n",
       "5033        7.0          1.85                19000  \n",
       "5035        6.9          1.37                    0  \n",
       "5042        6.6          1.85                  456  \n",
       "\n",
       "[3756 rows x 28 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "a362d618",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5026</th>\n",
       "      <td>Color</td>\n",
       "      <td>Olivier Assayas</td>\n",
       "      <td>81.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>Béatrice Dalle</td>\n",
       "      <td>576.0</td>\n",
       "      <td>136007.0</td>\n",
       "      <td>Drama|Music|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>French</td>\n",
       "      <td>France</td>\n",
       "      <td>R</td>\n",
       "      <td>4500.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.35</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5027</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jafar Panahi</td>\n",
       "      <td>64.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Nargess Mamizadeh</td>\n",
       "      <td>5.0</td>\n",
       "      <td>673780.0</td>\n",
       "      <td>Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Persian</td>\n",
       "      <td>Iran</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.85</td>\n",
       "      <td>697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5033</th>\n",
       "      <td>Color</td>\n",
       "      <td>Shane Carruth</td>\n",
       "      <td>143.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>291.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>David Sullivan</td>\n",
       "      <td>291.0</td>\n",
       "      <td>424760.0</td>\n",
       "      <td>Drama|Sci-Fi|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>371.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.85</td>\n",
       "      <td>19000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5035</th>\n",
       "      <td>Color</td>\n",
       "      <td>Robert Rodriguez</td>\n",
       "      <td>56.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Peter Marquardt</td>\n",
       "      <td>121.0</td>\n",
       "      <td>2040920.0</td>\n",
       "      <td>Action|Crime|Drama|Romance|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>130.0</td>\n",
       "      <td>Spanish</td>\n",
       "      <td>USA</td>\n",
       "      <td>R</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.37</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3756 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5026  Color    Olivier Assayas                    81.0     110.0   \n",
       "5027  Color       Jafar Panahi                    64.0      90.0   \n",
       "5033  Color      Shane Carruth                   143.0      77.0   \n",
       "5035  Color   Robert Rodriguez                    56.0      81.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                         0.0                   855.0   Joel David Moore   \n",
       "1                       563.0                  1000.0      Orlando Bloom   \n",
       "2                         0.0                   161.0       Rory Kinnear   \n",
       "3                     22000.0                 23000.0     Christian Bale   \n",
       "5                       475.0                   530.0    Samantha Morton   \n",
       "...                       ...                     ...                ...   \n",
       "5026                    107.0                    45.0     Béatrice Dalle   \n",
       "5027                    397.0                     0.0  Nargess Mamizadeh   \n",
       "5033                    291.0                     8.0     David Sullivan   \n",
       "5035                      0.0                     6.0    Peter Marquardt   \n",
       "5042                     16.0                    16.0   Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross  \\\n",
       "0                     1000.0  760505847.0   \n",
       "1                    40000.0  309404152.0   \n",
       "2                    11000.0  200074175.0   \n",
       "3                    27000.0  448130642.0   \n",
       "5                      640.0   73058679.0   \n",
       "...                      ...          ...   \n",
       "5026                   576.0     136007.0   \n",
       "5027                     5.0     673780.0   \n",
       "5033                   291.0     424760.0   \n",
       "5035                   121.0    2040920.0   \n",
       "5042                    86.0      85222.0   \n",
       "\n",
       "                                   genres  ... num_user_for_reviews language  \\\n",
       "0         Action|Adventure|Fantasy|Sci-Fi  ...               3054.0  English   \n",
       "1                Action|Adventure|Fantasy  ...               1238.0  English   \n",
       "2               Action|Adventure|Thriller  ...                994.0  English   \n",
       "3                         Action|Thriller  ...               2701.0  English   \n",
       "5                 Action|Adventure|Sci-Fi  ...                738.0  English   \n",
       "...                                   ...  ...                  ...      ...   \n",
       "5026                  Drama|Music|Romance  ...                 39.0   French   \n",
       "5027                                Drama  ...                 26.0  Persian   \n",
       "5033                Drama|Sci-Fi|Thriller  ...                371.0  English   \n",
       "5035  Action|Crime|Drama|Romance|Thriller  ...                130.0  Spanish   \n",
       "5042                          Documentary  ...                 84.0  English   \n",
       "\n",
       "      country  content_rating       budget  title_year actor_2_facebook_likes  \\\n",
       "0         USA           PG-13  237000000.0      2009.0                  936.0   \n",
       "1         USA           PG-13  300000000.0      2007.0                 5000.0   \n",
       "2          UK           PG-13  245000000.0      2015.0                  393.0   \n",
       "3         USA           PG-13  250000000.0      2012.0                23000.0   \n",
       "5         USA           PG-13  263700000.0      2012.0                  632.0   \n",
       "...       ...             ...          ...         ...                    ...   \n",
       "5026   France               R       4500.0      2004.0                  133.0   \n",
       "5027     Iran       Not Rated      10000.0      2000.0                    0.0   \n",
       "5033      USA           PG-13       7000.0      2004.0                   45.0   \n",
       "5035      USA               R       7000.0      1992.0                   20.0   \n",
       "5042      USA              PG       1100.0      2004.0                   23.0   \n",
       "\n",
       "     imdb_score  aspect_ratio movie_facebook_likes  \n",
       "0           7.9          1.78                33000  \n",
       "1           7.1          2.35                    0  \n",
       "2           6.8          2.35                85000  \n",
       "3           8.5          2.35               164000  \n",
       "5           6.6          2.35                24000  \n",
       "...         ...           ...                  ...  \n",
       "5026        6.9          2.35                  171  \n",
       "5027        7.5          1.85                  697  \n",
       "5033        7.0          1.85                19000  \n",
       "5035        6.9          1.37                    0  \n",
       "5042        6.6          1.85                  456  \n",
       "\n",
       "[3756 rows x 28 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(how='any')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "337eec26",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    23\n",
       "True      5\n",
       "Name: 5039, dtype: int64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.loc[5039].isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "23e08acd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5037</th>\n",
       "      <td>Color</td>\n",
       "      <td>Edward Burns</td>\n",
       "      <td>14.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>Caitlin FitzGerald</td>\n",
       "      <td>296.0</td>\n",
       "      <td>4584.0</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>14.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>9000.0</td>\n",
       "      <td>2011.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>6.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4902 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5037  Color       Edward Burns                    14.0      95.0   \n",
       "5038  Color        Scott Smith                     1.0      87.0   \n",
       "5040  Color   Benjamin Roberds                    13.0      76.0   \n",
       "5041  Color        Daniel Hsia                    14.0     100.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes        actor_2_name  \\\n",
       "0                         0.0                   855.0    Joel David Moore   \n",
       "1                       563.0                  1000.0       Orlando Bloom   \n",
       "2                         0.0                   161.0        Rory Kinnear   \n",
       "3                     22000.0                 23000.0      Christian Bale   \n",
       "5                       475.0                   530.0     Samantha Morton   \n",
       "...                       ...                     ...                 ...   \n",
       "5037                      0.0                   133.0  Caitlin FitzGerald   \n",
       "5038                      2.0                   318.0       Daphne Zuniga   \n",
       "5040                      0.0                     0.0       Maxwell Moody   \n",
       "5041                      0.0                   489.0       Daniel Henney   \n",
       "5042                     16.0                    16.0    Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "5                      640.0   73058679.0          Action|Adventure|Sci-Fi   \n",
       "...                      ...          ...                              ...   \n",
       "5037                   296.0       4584.0                     Comedy|Drama   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "5     ...                738.0  English      USA           PG-13  263700000.0   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5037  ...                 14.0  English      USA       Not Rated       9000.0   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "5         2012.0                  632.0        6.6          2.35   \n",
       "...          ...                    ...        ...           ...   \n",
       "5037      2011.0                  205.0        6.4           NaN   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "5                   24000  \n",
       "...                   ...  \n",
       "5037                  413  \n",
       "5038                   84  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[4902 rows x 28 columns]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(thresh=24)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "4002393d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>genres</th>\n",
       "      <th>movie_title</th>\n",
       "      <th>num_voted_users</th>\n",
       "      <th>cast_total_facebook_likes</th>\n",
       "      <th>movie_imdb_link</th>\n",
       "      <th>country</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>178.000000</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>886204</td>\n",
       "      <td>4834</td>\n",
       "      <td>http://www.imdb.com/title/tt0499549/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>7.9</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>169.000000</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>471220</td>\n",
       "      <td>48350</td>\n",
       "      <td>http://www.imdb.com/title/tt0449088/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>7.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>148.000000</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>275868</td>\n",
       "      <td>11700</td>\n",
       "      <td>http://www.imdb.com/title/tt2379713/?ref_=fn_t...</td>\n",
       "      <td>UK</td>\n",
       "      <td>6.8</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>164.000000</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>1144337</td>\n",
       "      <td>106759</td>\n",
       "      <td>http://www.imdb.com/title/tt1345836/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>8.5</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>107.201074</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>Star Wars: Episode VII - The Force Awakens    ...</td>\n",
       "      <td>8</td>\n",
       "      <td>143</td>\n",
       "      <td>http://www.imdb.com/title/tt5289954/?ref_=fn_t...</td>\n",
       "      <td></td>\n",
       "      <td>7.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>87.000000</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>Signed Sealed Delivered</td>\n",
       "      <td>629</td>\n",
       "      <td>2283</td>\n",
       "      <td>http://www.imdb.com/title/tt3000844/?ref_=fn_t...</td>\n",
       "      <td>Canada</td>\n",
       "      <td>7.7</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>The Following</td>\n",
       "      <td>73839</td>\n",
       "      <td>1753</td>\n",
       "      <td>http://www.imdb.com/title/tt2071645/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>7.5</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>76.000000</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>A Plague So Pleasant</td>\n",
       "      <td>38</td>\n",
       "      <td>0</td>\n",
       "      <td>http://www.imdb.com/title/tt2107644/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>6.3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>1255</td>\n",
       "      <td>2386</td>\n",
       "      <td>http://www.imdb.com/title/tt2070597/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>6.3</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>90.000000</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>4285</td>\n",
       "      <td>163</td>\n",
       "      <td>http://www.imdb.com/title/tt0378407/?ref_=fn_t...</td>\n",
       "      <td>USA</td>\n",
       "      <td>6.6</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        duration                           genres  \\\n",
       "0     178.000000  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1     169.000000         Action|Adventure|Fantasy   \n",
       "2     148.000000        Action|Adventure|Thriller   \n",
       "3     164.000000                  Action|Thriller   \n",
       "4     107.201074                      Documentary   \n",
       "...          ...                              ...   \n",
       "5038   87.000000                     Comedy|Drama   \n",
       "5039   43.000000     Crime|Drama|Mystery|Thriller   \n",
       "5040   76.000000            Drama|Horror|Thriller   \n",
       "5041  100.000000             Comedy|Drama|Romance   \n",
       "5042   90.000000                      Documentary   \n",
       "\n",
       "                                            movie_title  num_voted_users  \\\n",
       "0                                               Avatar            886204   \n",
       "1             Pirates of the Caribbean: At World's End            471220   \n",
       "2                                              Spectre            275868   \n",
       "3                                The Dark Knight Rises           1144337   \n",
       "4     Star Wars: Episode VII - The Force Awakens    ...                8   \n",
       "...                                                 ...              ...   \n",
       "5038                           Signed Sealed Delivered               629   \n",
       "5039                         The Following                         73839   \n",
       "5040                              A Plague So Pleasant                38   \n",
       "5041                                  Shanghai Calling              1255   \n",
       "5042                                 My Date with Drew              4285   \n",
       "\n",
       "      cast_total_facebook_likes  \\\n",
       "0                          4834   \n",
       "1                         48350   \n",
       "2                         11700   \n",
       "3                        106759   \n",
       "4                           143   \n",
       "...                         ...   \n",
       "5038                       2283   \n",
       "5039                       1753   \n",
       "5040                          0   \n",
       "5041                       2386   \n",
       "5042                        163   \n",
       "\n",
       "                                        movie_imdb_link country  imdb_score  \\\n",
       "0     http://www.imdb.com/title/tt0499549/?ref_=fn_t...     USA         7.9   \n",
       "1     http://www.imdb.com/title/tt0449088/?ref_=fn_t...     USA         7.1   \n",
       "2     http://www.imdb.com/title/tt2379713/?ref_=fn_t...      UK         6.8   \n",
       "3     http://www.imdb.com/title/tt1345836/?ref_=fn_t...     USA         8.5   \n",
       "4     http://www.imdb.com/title/tt5289954/?ref_=fn_t...                 7.1   \n",
       "...                                                 ...     ...         ...   \n",
       "5038  http://www.imdb.com/title/tt3000844/?ref_=fn_t...  Canada         7.7   \n",
       "5039  http://www.imdb.com/title/tt2071645/?ref_=fn_t...     USA         7.5   \n",
       "5040  http://www.imdb.com/title/tt2107644/?ref_=fn_t...     USA         6.3   \n",
       "5041  http://www.imdb.com/title/tt2070597/?ref_=fn_t...     USA         6.3   \n",
       "5042  http://www.imdb.com/title/tt0378407/?ref_=fn_t...     USA         6.6   \n",
       "\n",
       "      movie_facebook_likes  \n",
       "0                    33000  \n",
       "1                        0  \n",
       "2                    85000  \n",
       "3                   164000  \n",
       "4                        0  \n",
       "...                    ...  \n",
       "5038                    84  \n",
       "5039                 32000  \n",
       "5040                    16  \n",
       "5041                   660  \n",
       "5042                   456  \n",
       "\n",
       "[5043 rows x 9 columns]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "62d6493f",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.title_year = data.title_year.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "2a6ba200",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.title_year = data.title_year.astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "d81dae59",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       2009\n",
       "1       2007\n",
       "2       2015\n",
       "3       2012\n",
       "4          0\n",
       "        ... \n",
       "5038    2013\n",
       "5039       0\n",
       "5040    2013\n",
       "5041    2012\n",
       "5042    2004\n",
       "Name: title_year, Length: 5043, dtype: int32"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.title_year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "c1ccd557",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5043 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      5024 non-null   object \n",
      " 1   director_name              4939 non-null   object \n",
      " 2   num_critic_for_reviews     4993 non-null   float64\n",
      " 3   duration                   5043 non-null   float64\n",
      " 4   director_facebook_likes    4939 non-null   float64\n",
      " 5   actor_3_facebook_likes     5020 non-null   float64\n",
      " 6   actor_2_name               5030 non-null   object \n",
      " 7   actor_1_facebook_likes     5036 non-null   float64\n",
      " 8   gross                      4159 non-null   float64\n",
      " 9   genres                     5043 non-null   object \n",
      " 10  actor_1_name               5036 non-null   object \n",
      " 11  movie_title                5043 non-null   object \n",
      " 12  num_voted_users            5043 non-null   int64  \n",
      " 13  cast_total_facebook_likes  5043 non-null   int64  \n",
      " 14  actor_3_name               5020 non-null   object \n",
      " 15  facenumber_in_poster       5030 non-null   float64\n",
      " 16  plot_keywords              4890 non-null   object \n",
      " 17  movie_imdb_link            5043 non-null   object \n",
      " 18  num_user_for_reviews       5022 non-null   float64\n",
      " 19  language                   5031 non-null   object \n",
      " 20  country                    5043 non-null   object \n",
      " 21  content_rating             4740 non-null   object \n",
      " 22  budget                     4551 non-null   float64\n",
      " 23  title_year                 5043 non-null   int32  \n",
      " 24  actor_2_facebook_likes     5030 non-null   float64\n",
      " 25  imdb_score                 5043 non-null   float64\n",
      " 26  aspect_ratio               4714 non-null   float64\n",
      " 27  movie_facebook_likes       5043 non-null   int64  \n",
      "dtypes: float64(12), int32(1), int64(3), object(12)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "c69c53f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('./data/movie_metadata.csv', dtype={'title_year':str})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "faac934f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4    NaN        Doug Walker                     NaN       NaN   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "   actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                  1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                 40000.0  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                 11000.0  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                 27000.0  448130642.0                  Action|Thriller  ...   \n",
       "4                   131.0          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0        2009                  936.0        7.9          1.78   \n",
       "1        2007                 5000.0        7.1          2.35   \n",
       "2        2015                  393.0        6.8          2.35   \n",
       "3        2012                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "169d3e26",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[4,'color'] = 'Color'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "1980daec",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Color</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4  Color        Doug Walker                     NaN       NaN   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "   actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                  1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                 40000.0  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                 11000.0  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                 27000.0  448130642.0                  Action|Thriller  ...   \n",
       "4                   131.0          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0        2009                  936.0        7.9          1.78   \n",
       "1        2007                 5000.0        7.1          2.35   \n",
       "2        2015                  393.0        6.8          2.35   \n",
       "3        2012                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "aa9faa86",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                                                 Avatar \n",
       "1               Pirates of the Caribbean: At World's End \n",
       "2                                                Spectre \n",
       "3                                  The Dark Knight Rises \n",
       "4       Star Wars: Episode VII - The Force Awakens    ...\n",
       "                              ...                        \n",
       "5038                             Signed Sealed Delivered \n",
       "5039                           The Following             \n",
       "5040                                A Plague So Pleasant \n",
       "5041                                    Shanghai Calling \n",
       "5042                                   My Date with Drew \n",
       "Name: movie_title, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.movie_title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "ca80eb98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ABC'"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'abc'.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "c9548523",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.movie_title = data.movie_title.str.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "dcd15080",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['movie_title'] = data.movie_title.str.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "7cd19ecf",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={\n",
    "    'title_year':'年份',\n",
    "    'movie_title':'电影年份'\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "6088d18c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       2009\n",
       "1       2007\n",
       "2       2015\n",
       "3       2012\n",
       "4        NaN\n",
       "        ... \n",
       "5038    2013\n",
       "5039     NaN\n",
       "5040    2013\n",
       "5041    2012\n",
       "5042    2004\n",
       "Name: 年份, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.年份"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "ca4bfe64",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('data/new_movie_clean_data.csv',index=None,header=None,encoding='UTF-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72fa6618",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
